﻿/* 
%include "/projects/hsieh_project/proj_201809/code_1_data/data_0_fknaics_sum.sas" /source2;

Author: Adarsh Kumar
Objective: Industry level stats (total employment, top 10% employment, top 10% establishment count), 
where industry is defined using fknaics, naics, sic.
*/


/*---------------------------------------------------------*/
/* Define macro that aggregates to desired level */
/*-------------------------------------------------------*--*/

%macro fknaics_ind_sum(dt_in = , dt_out = ,l_perc = 10, group_var = , years = , ivar = , ivaro = ,vperc = );

%put Input &dt_in.;
%put Output &dt_out.;
%put Percentile &l_perc.;
%put Industry Variable &group_var.;
%put Years of Analysis &years.;
%put Top Firm Definition &ivar.;
%put # of Mkts variable label &ivaro.;

/*Subset data to specific years (i.e. fknaics = 1977, 2013; naics = 2002, 2013; sic = 1977, 2001) */
data dt_var; 
  set &dt_in.;
  if year in (&years.);
run;

/*If top-firm defined by employment: */
%if "&ivar." = "worker" %then %do;
%put Employment used to define top firms;
%m_perc_by_var(ds_in=dt_var, ds_out=dt_perc, var_in=worker, var_by=%bquote(year &group_var.), var_unit=firmnum, var_out=&vperc., var_runif=runif);
%end;

/*If top-firm defined by mkt: calculate percentile by mkt*/
%if "&ivar." ~= "worker" %then %do;
%put Mkt variable used to define top firm;
%put Keeping first obs of Year Ind Firm Mkt variable;
proc sort data=dt_var; by year &group_var. firmnum &ivar.;
data dt_var_for_perc; 
  set dt_var;
  by year &group_var. firmnum &ivar.;
  if first.&ivar.;
  &ivaro. = 1;
run;
proc sort data = dt_var_for_perc; by year &group_var. firmnum &ivar.;
%m_perc_by_var(ds_in = dt_var_for_perc, ds_out = dt_perc, var_in = &ivaro., var_by=%bquote(year &group_var.), var_unit=firmnum, var_out=&vperc., var_runif=runif);
%end;

/*
Percentiles have been computed at the industry (fknaics etc) - firm level. Now to merge:
dt_var is at the establishment level. dt_perc is at the ind-firm level
*/
proc sort data=dt_var; by year &group_var. firmnum; 
proc sort data=dt_perc; by year &group_var. firmnum; 

/*
Note: dt_indf will be at the establishment level (industry-firm-est level)
Need to be careful with finding # of firms, # of est (need to tag each unique obs)
*/
data dt_indf;
  merge dt_var dt_perc;
  by year &group_var. firmnum;
run;

/*Tagging first observation of firmnum-industry to find number of firms */
proc sort data=dt_indf; by year &group_var. firmnum;

data dt_indf;
  set dt_indf;
  by year &group_var. firmnum;
  n_ind = first.firmnum; 
run;

/*Preserving top variable to append to top-variables in while loop */
%Let top = &ivaro.;

%if "&ivar." = "worker" %then %do;
%Let ivaro = n_est;
%put Initiating variable &ivaro. to calculate n_est for top firms by employment;

data dt_indf; 
  set dt_indf; 
  emp_ind = worker;
  &ivaro. = 1;
run;
%end;

%if "&ivar." ~= "worker" %then %do;
%put Tagging first observation of ind-firm-mkt with &ivaro.;

proc sort data = dt_indf; by year &group_var. firmnum &ivar.; 

data dt_indf;
  set dt_indf;
  emp_ind = worker;
  by year &group_var. firmnum &ivar.;
  &ivaro. = first.&ivar.;
run;
%end;

/*
--------------------------------------------------------------------------------
Count total number of cities, employment, # firms for top firms
*/
%Let l_ind_var = &ivaro. emp_ind n_ind;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_perc), &i_list) ~= );
  %Let i_perc=%scan(%bquote(&l_perc), &i_list.);
  %put Top &i_perc.% Industry Firms;
  
  data dt_indf;
    set dt_indf;
    &ivaro._&top.&i_perc.=0;
    emp_ind_&top.&i_perc = 0;
    n_ind_&top.&i_perc. = 0;
    
    if &vperc._perc <= %sysevalf(&i_perc. / 100) then
      do;
	&ivaro._&top.&i_perc.= &ivaro.;
	emp_ind_&top.&i_perc. = emp_ind;
	n_ind_&top.&i_perc. = n_ind;
      end;
  run;
  
  %Let l_ind_var = &l_ind_var. &ivaro._&top.&i_perc. emp_ind_&top.&i_perc. n_ind_&top.&i_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Firms;
%put &l_ind_var.;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry level
*/

proc sort data=dt_indf; by year &group_var.;

proc means data=dt_indf noprint;
  by year &group_var.;
  output out=&dt_out.(drop = _type_ _freq_) sum(&l_ind_var.) = &l_ind_var.;
run;

%mend;

/* End of file */
